
####
# R-script performing all estimates of AKM model, variance decomposition and tests
# New version using functions 
# Author: Thiago Scot
# This version: Apr 2021
####

#### load necessary packages & define paths ####

if (!require("pacman")) install.packages("pacman")
pacman::p_load(lfe, readr,xtable, dplyr, tidyr, stringr,splitstackshape,tictoc,
               gplots, apcluster,ggplot2,igraph,data.table, parallel, pbapply, future, future.apply,furrr)

set.seed(12345)         # Set seed for reproducibility of split-sample variance estimation

path = "/Users/thiagoscott/Dropbox/Juizes Brasil/Paper_Selection/submissions/Restat_Submission/acceptance/materials/"
out = paste0(path, "out")

####
#### DEFINING FUNCTIONS ####
###

#Perform AKM
akm_simple = function (out_var, covar ,feffects, dt, get_fe = F) {
  
  #First step: compute CS, keep only largest within each state
  dt_factor = dt %>% 
    mutate(Judge_idP = factor(Judge_idP), Court_idP2 = factor(Court_idP2))
  
  unique_pairs = dt_factor %>% 
    select(Judge_idP,Court_idP2) %>% 
    unique()
  
  cs_list = compfactor(unique_pairs)
  cs_dt = data.frame(unique_pairs,cs_list)
  
  dt_cs = dt_factor %>% 
    left_join(cs_dt, by = c('Judge_idP', 'Court_idP2')) %>% 
    group_by(cs_list) %>% 
    mutate(number_obs_cs = n()) %>% 
    group_by(State) %>% 
    mutate(max_number = (number_obs_cs == max(number_obs_cs)))
  
  dt_largest_cs = dt_cs %>% 
    filter(max_number == TRUE)
  
  judges_stats = dt_largest_cs %>% as.data.table() %>% 
    .[,.N, c('Judge_idP', 'Court_idP2')] %>% 
    .[,.N,'Judge_idP'] %>% 
    .[, .(number = .N, share = mean(N>1))]
  
  #Second step: Estimate AKM model
  controls = paste(covar, collapse = " + ")
  esp = paste(out_var, controls, sep = " ~ ")
  fe = paste(feffects, collapse = " + ")
  form = as.formula(paste(esp, fe,sep = " | "))

  model = felm(form ,data = dt_largest_cs, keepX = TRUE)
    
  model_stats = summary(model)
  
  if (get_fe == TRUE) {
    print('Computing fixed-effects')
    results = getfe(model, ef = "zm2", se = TRUE, bN = 10)
    
    return(list(model,model_stats,results,judges_stats))
  }
  
  else {
    return(list(model,model_stats,judges_stats))
  }
  
}

#Function to extract and clean FE
obtain_FE = function(results_dt, original_dt)  {
  
  #First step: compute CS, keep only largest within each state
  dt_factor = original_dt %>% 
    mutate(Judge_idP = factor(Judge_idP), Court_idP2 = factor(Court_idP2))
  
  unique_pairs = dt_factor %>% 
    select(Judge_idP,Court_idP2) %>% 
    unique()
  
  cs_list = compfactor(unique_pairs)
  cs_dt = data.frame(unique_pairs,cs_list)
  
  dt_cs = dt_factor %>% 
    left_join(cs_dt, by = c('Judge_idP', 'Court_idP2')) %>% 
    group_by(cs_list) %>% 
    mutate(number_obs_cs = n()) %>% 
    group_by(State) %>% 
    mutate(max_number = (number_obs_cs == max(number_obs_cs))) %>% 
    ungroup() %>% 
    select(Judge_idP, Court_idP2, max_number)
  
  dt_largest_cs = dt_cs %>% 
    filter(max_number == TRUE) %>% 
    unique(by = c('Judge_idP', 'Court_idP2'))
  
  results_dt$names = rownames(results_dt)
  results_dt = results_dt %>% 
    separate(names, c("fe", "idx"), "\\.")
  
  judge_FE = results_dt %>% filter(fe == "Judge_idP") %>% 
    select(Judge_idP = idx, 
           Judge_FE = effect, 
           Judge_FE_se = se,
           CS_id = comp) 
  
  #Collecting Court FEs
  court_FE = results_dt %>% 
    filter(fe ==  'Court_idP2') %>% 
    select(Court_idP2 = idx, 
           Court_FE = effect, 
           Court_FE_se = se)
  
  #Collecting CS FEs
  CS_FE = results_dt %>% 
    filter(fe == 'icpt') %>% 
    select(CS_id = idx, 
           CS_FE = effect, 
           CS_FE_se = se) 
  
  final_FE = dt_largest_cs %>% 
    left_join(judge_FE, by = 'Judge_idP') %>% 
    left_join(court_FE, by = 'Court_idP2') %>% 
    left_join(CS_FE, by = 'CS_id')
  
  return = final_FE
}


obtain_Judge_FE = function(results_dt)  {
  
  results_dt$names = rownames(results_dt)
  results_dt = results_dt %>% 
    separate(names, c("fe", "idx"), "\\.")
  
  judge_FE = results_dt %>% filter(fe == "Judge_idP") %>% 
    select(Judge_idP = idx, 
           Judge_FE = effect) %>% 
    mutate(Judge_idP = as.integer(Judge_idP))

  return = judge_FE
}

get_variance_split = function(dataset) {
  
  stats_full = akm_simple('sentence_IHS',
                          controls,
                          c('Court_idP2','Judge_idP','YM'),
                          dataset)
  
  #Split sample randomly in two components by Judge-court pairs
  out <- stratified(dataset,c('Judge_idP', 'Court_idP2'), .5,bothSets = TRUE )
  
  #Compute fixed effects in two samples using parallel computing
  model_split = future_map(out, ~akm_simple('sentence_IHS',
                                            controls,
                                            c('Court_idP2','Judge_idP','YM'),
                                            get_fe = TRUE,
                                            dt = .),
                           seed = TRUE,
                           .progress = TRUE)
  
  FE_Split1= obtain_FE(model_split$SAMP1[[3]], out$SAMP1)
  FE_Split2= obtain_FE(model_split$SAMP2[[3]],out$SAMP2)
  
  var = dataset %>%
    mutate(Judge_idP = as.character(Judge_idP),
           Court_idP2 = as.character(Court_idP2)) %>% 
    left_join(FE_Split1 %>% 
                rename_with(~paste0(.,"_Split1"),-c('Judge_idP', 'Court_idP2')), 
              by = c('Judge_idP', 'Court_idP2')) %>% 
    left_join(FE_Split2 %>% 
                rename_with(~paste0(.,"_Split2"),-c('Judge_idP', 'Court_idP2')), 
              by = c('Judge_idP', 'Court_idP2')) %>% 
    filter(!is.na(Judge_FE_Split1) & !is.na(Judge_FE_Split2)) 
  
  var_split = c(var(var$sentence_IHS),
                cov(var$Judge_FE_Split1,var$Judge_FE_Split2)/var(var$sentence_IHS), 
                cov(var$Court_FE_Split1,var$Court_FE_Split2)/var(var$sentence_IHS), 
                cov(var$CS_FE_Split1,var$CS_FE_Split2)/var(var$sentence_IHS), 
                (cov(var$Judge_FE_Split1 + var$Court_FE_se_Split1,
                     var$Judge_FE_Split2 + var$Court_FE_se_Split2) + 
                   cov(var$CS_FE_Split1,var$CS_FE_Split2))/var(var$sentence_IHS),
                stats_full[[2]]$adj.r.squared,
                stats_full[[2]]$N,
                stats_full[[3]][[1]],
                stats_full[[3]][[2]])
  
  return(var_split)
}

############################ LOAD DATA AND COMPUTE AKM ANALYSES ################################################

#Loads Stata dataset
dataset_estimate <- fread(paste0(path,"temp/Intermed_FE_R.csv"))  %>% 
  select(-c(Judge, Court_name))
dataset_full <- fread(paste0(path,"temp/Intermed_FE_FULL_SAMPLE_R.csv"))  %>% 
  select(-c(Judge, Court_name))
dataset_cases_SP <- fread(paste0(path,"input/BancoJulgados_AKM.csv"))  %>% 
  select(-c(magistrado, comarca, foro, vara))

#Unique pairs of Judge_court on data
unique_pairs = dataset_estimate %>% 
  select(Judge_idP, Court_idP2) %>% 
  unique(by = c('Judge_idP', 'Court_idP2')) %>% 
  mutate(Judge_idP = as.character(Judge_idP),
         Court_idP2 = as.character(Court_idP2))

#Set up set of controls used in all regressions
controls = c('total_courts_overall','total_judges_overall')

# 1.Goodness-of-fit: Model with no Judge FE ----
model_NoJudge = akm_simple('sentence_IHS',
                           controls,
                           c('Court_idP2','YM'),
                           dataset_estimate)
stats_NoJudge = c(model_NoJudge[[2]]$r.squared, model_NoJudge[[2]]$adj.r.squared, 
                  model_NoJudge[[2]]$rse, model_NoJudge[[2]]$N)

#2.Baseline model: including Judges FE ----
model_Judge = akm_simple('sentence_IHS',
                         controls,
                         c('Court_idP2','Judge_idP','YM'),
                         dataset_estimate,
                         get_fe = TRUE)
stats_Judge  = c(model_Judge[[2]]$r.squared, model_Judge[[2]]$adj.r.squared, 
                 model_Judge[[2]]$rse, model_Judge[[2]]$N)
#F-test of joint significance of judges FE
f = ((sum(model_NoJudge[[2]]$residuals^2) - sum(model_Judge[[2]]$residuals^2))/sum(model_Judge[[2]]$residuals^2))/((model_NoJudge[[2]]$rdf - model_Judge[[2]]$rdf)/model_Judge[[2]]$rdf)

FE_Judges = obtain_FE(model_Judge[[3]], dataset_estimate)
Baseline_FE = obtain_Judge_FE(model_Judge[[3]])

dataset_estimate[,Residuals_main := model_Judge[[2]]$residuals]

#3.Goodness-of-fit: Model with Judge-by-court FE ----
model_JudgeCourt = akm_simple('sentence_IHS',
                              controls,
                           c('Judge_Court_idP','YM'),
                           dataset_estimate)
stats_JudgeCourt = c(model_JudgeCourt[[2]]$r.squared, model_JudgeCourt[[2]]$adj.r.squared, 
                     model_JudgeCourt[[2]]$rse, model_JudgeCourt[[2]]$N)

#4. Using Hearings as LHS variable ----
model_Hearings = akm_simple('hearing_IHS',
                            controls,
                         c('Court_idP2','Judge_idP','YM'),
                         dataset_estimate,
                         get_fe = TRUE)
FE_Hearings = obtain_FE(model_Hearings[[3]], dataset_estimate)

#5. Using Process Waiting > 100 days as LHS variable ----
model_Await = akm_simple('await_100days_IHS',
                         controls,
                            c('Court_idP2','Judge_idP','YM'),
                            dataset_estimate,
                            get_fe = TRUE)
FE_Await = obtain_FE(model_Await[[3]], dataset_estimate)

#6. Using Extintion of punishment as LHS variable ----
model_Extinction = akm_simple('extinction_IHS',
                              controls,
                         c('Court_idP2','Judge_idP','YM'),
                         dataset_estimate,
                         get_fe = TRUE)
FE_Extinction= obtain_FE(model_Extinction[[3]], dataset_estimate)

#7. Using Average court appeal cases as LHS variable ----
model_Appeals = akm_simple('court_appeal_IHS',
                           controls,
                              c('Court_idP2','Judge_idP','YM'),
                              dataset_estimate,
                              get_fe = TRUE)
FE_Appeals= obtain_FE(model_Appeals[[3]], dataset_estimate)


#7. Using Average court appeal cases as LHS variable ----
dataset_big = dataset_estimate %>% filter(judges_month > 1)
model_big = akm_simple('court_appeal_IHS',
                           controls,
                           c('Court_idP2','Judge_idP','YM'),
                           dataset_big,
                           get_fe = TRUE)
FE_bigCourt= obtain_FE(model_big[[3]], dataset_big)

#8 Estimating model using case level data for SP
model_duration = akm_simple('duration_main',
                            controls,
                           c('Court_idP2','Judge_idP','classe'),
                           dataset_cases_SP,
                           get_fe = TRUE)
FE_SP= obtain_Judge_FE(model_duration[[3]])
model_logduration = akm_simple('log_duration',
                               controls,
                            c('Court_idP2','Judge_idP','classe'),
                            dataset_cases_SP,
                            get_fe = TRUE)
FE_SP_log= obtain_Judge_FE(model_logduration[[3]])

compare_SP = FE_SP %>% left_join(FE_SP_log %>% rename(SPlog_FE = Judge_FE), by = 'Judge_idP') %>% 
  left_join(Baseline_FE %>% rename(baseline_FE = Judge_FE), by = 'Judge_idP')

fwrite(compare_SP, file =  paste0(path,"temp/FE_SP_data.csv"))

#### Table 2: Goodness of fit measures ####
aux_table = list(NoJudge = stats_NoJudge,Judge = stats_Judge, JudgeCourt = stats_JudgeCourt) %>% 
  as.data.frame() %>% 
  round(3) %>% 
  rbind(c('No', 'Yes', 'No')) %>% 
  rbind(c('No', 'No', 'Yes'))
rownames(aux_table) = c('R-squared','Adjusted R-squared','Residual Standard Error (RSE)','Observations',
                       'Judge FE','Judge-by-Court FE')
colnames(aux_table) = c("(1)","(2)","(3)")

print(xtable(aux_table, type = "latex", align = c("l","c", "c","c"), auto = TRUE),
      floating = FALSE, 
      booktabs= TRUE,
      format.args = list(big.mark = ",", decimal.mark = "."),
      hline.after = c(-1,0,3,6),
      file = paste0(path,"output/tables/goodness_fit.tex"))


#####  VARIANCE SHRINKING STRATEGY: SPLIT SAMPLE ##### 
#Randomly splits sample in two halfs
out <- stratified(dataset_estimate,c('Judge_idP', 'Court_idP2'), .5,bothSets = TRUE )

model_split1 = akm_simple('sentence_IHS',
                          controls,
                         c('Court_idP2','Judge_idP','YM'),
                         out$SAMP1,
                         get_fe = TRUE)
FE_Split1= obtain_FE(model_split1[[3]], dataset_estimate)

model_split2 = akm_simple('sentence_IHS',
                          controls,
                         c('Court_idP2','Judge_idP','YM'),
                         out$SAMP2,
                         get_fe = TRUE)
FE_Split2= obtain_FE(model_split2[[3]], dataset_estimate)

###Prepare tables to export FE back to Stata
dt_FixedEffects = unique_pairs %>% 
  left_join(FE_Judges, by = c('Judge_idP', 'Court_idP2')) %>% 
  left_join(FE_Hearings %>% 
              rename_with(~paste0(.,"_Hear"),-c('Judge_idP', 'Court_idP2')),
            by = c('Judge_idP', 'Court_idP2')) %>% 
  left_join(FE_Split1 %>% 
              rename_with(~paste0(.,"_Split1"),-c('Judge_idP', 'Court_idP2')),
            by = c('Judge_idP', 'Court_idP2')) %>%
  left_join(FE_Split2 %>% 
              rename_with(~paste0(.,"_Split2"),-c('Judge_idP', 'Court_idP2')),
            by = c('Judge_idP', 'Court_idP2')) %>% 
  left_join(FE_Await %>% 
              rename_with(~paste0(.,"_Await"),-c('Judge_idP', 'Court_idP2')),
            by = c('Judge_idP', 'Court_idP2')) %>% 
  left_join(FE_Extinction %>% 
              rename_with(~paste0(.,"_Extinct"),-c('Judge_idP', 'Court_idP2')),
            by = c('Judge_idP', 'Court_idP2')) %>% 
  left_join(FE_Appeals %>% 
              rename_with(~paste0(.,"_Appeal"),-c('Judge_idP', 'Court_idP2')),
            by = c('Judge_idP', 'Court_idP2')) %>%
  mutate(Judge_idP = as.integer(Judge_idP),
         Court_idP2 = as.integer(Court_idP2))

fwrite(dt_FixedEffects,
      file = paste0(path, "temp/Intermed_FE_All.csv"))


#FE for big courts
dt_FixedEffects_big = unique_pairs %>% 
  left_join(FE_bigCourt %>% 
              rename_with(~paste0(.,"_bigCourt"),-c('Judge_idP', 'Court_idP2')),
            by = c('Judge_idP', 'Court_idP2')) 

fwrite(dt_FixedEffects_big,
       file = paste0(path, "temp/Intermed_FE_bigCourt.csv"))

#### Variance Estimation ####
dataset_estimate_final = dataset_estimate %>% 
  left_join(dt_FixedEffects, 
            by = c('Judge_idP', 'Court_idP2'))


####### Table 3: Variance decomposition 

#Raw variance column
var_raw = c(var(dataset_estimate_final$sentence_IHS), 
            var(dataset_estimate_final$Judge_FE), 
            var(dataset_estimate_final$Court_FE), 
            var(dataset_estimate_final$CS_FE), 
            var(dataset_estimate_final$Judge_FE + dataset_estimate_final$Court_FE) + 
              var(dataset_estimate_final$CS_FE, na.rm = TRUE))
#Split-variance column
var_split = c(var(dataset_estimate_final$sentence_IHS),
              cov(dataset_estimate_final$Judge_FE_Split1,dataset_estimate_final$Judge_FE_Split2), 
              cov(dataset_estimate_final$Court_FE_Split1,dataset_estimate_final$Court_FE_Split2), 
              cov(dataset_estimate_final$CS_FE_Split1,dataset_estimate_final$CS_FE_Split2), 
              cov(dataset_estimate_final$Judge_FE_Split1 + dataset_estimate_final$Court_FE_se_Split1 + dataset_estimate_final$CS_FE_Split1,
                  dataset_estimate_final$Judge_FE_Split2 + dataset_estimate_final$Court_FE_se_Split2) + 
                cov(dataset_estimate_final$CS_FE_Split1,dataset_estimate_final$CS_FE_Split2))

var_split_share = var_split/var(dataset_estimate_final$sentence_IHS)

aux_vartable = list(raw = var_raw,split = var_split, share = var_split_share) %>% 
  as.data.frame() %>% 
  round(2)
rownames(aux_vartable) = c('Cases disposed (IHS)','Judge FE ','Court FE ','Connected Set FE ','Judge+Court FE')
colnames(aux_vartable) = c('Raw Variance','Split Sample Variance','Split sample Var - % Total')

print(xtable(aux_vartable, type = "latex", align = c("l","c","c","c")), 
      floating = FALSE, 
      booktabs = TRUE, 
      file = paste0(path,"output/tables/variance_R.tex"))


#### Figure 2: Residuals heatmap from two-way fixed-effects model ####
dataset_estimate_final = dataset_estimate_final %>% 
  mutate(quant_judge = cut(Judge_FE, breaks = quantile(Judge_FE, probs = seq(0, 1, 0.05)), 
                           include.lowest = TRUE, 
                           labels = 1:20),
         quant_court = cut(Court_FE, breaks = quantile(Court_FE, probs = seq(0, 1, 0.05), na.rm = TRUE),
                           include.lowest = TRUE, 
                           labels = 1:20) )

heat_data = dataset_estimate_final %>% 
  select(quant_judge, quant_court, Residuals_main) %>% 
  group_by(quant_judge, quant_court) %>% 
  summarise(mean = mean(Residuals_main))
  
#Printing residuals' heatmap
residuals_heatmap = ggplot(data = heat_data, aes(x=quant_judge, y=quant_court, fill=mean)) + 
  geom_tile() + 
  scale_fill_gradient2(low = "blue", high = "red", mid = "white", 
                                     midpoint = median(heat_data$mean), limit = c(min(heat_data$mean),max(heat_data$mean)), 
                                     space = "Lab",  name="Mean residual") + 
  xlab('Vingtiles of Judge FE') + 
  ylab('Vingtiles of Court FE')
ggsave(paste0(path,"output/figures/residual_heatmap.pdf"),width = 6, height = 4)

#Same graph but in BW for publication
residuals_heatmap = ggplot(data = heat_data, aes(x=quant_judge, y=quant_court, fill=mean)) + 
  geom_tile() + 
  scale_fill_gradient2(low = "black", high = "white",
                       midpoint = median(heat_data$mean), limit = c(min(heat_data$mean),max(heat_data$mean)), 
                       space = "Lab",  name="Mean residual") + 
  xlab('Vingtiles of Judge FE') + 
  ylab('Vingtiles of Court FE')
ggsave(paste0(path,"output/figures/residual_heatmap_bw.pdf"),width = 6, height = 4)

###Simulated heatmap for misspecified model
x <- rnorm(10000)
x2 <- rnorm(length(x))
id <- factor(sample(200,length(x),replace=TRUE))
firm <- factor(sample(50,length(x),replace=TRUE))
id.eff <- rnorm(nlevels(id),mean = 4, sd = 1)
firm.eff <- rnorm(nlevels(firm),mean = 4, sd = 1)

y <- x + 0.25*x2 + id.eff[id] + firm.eff[firm] + id.eff[id]*firm.eff[firm] + rnorm(length(x))

simul <- felm(y ~ x+x2 | id + firm )
residual_simul = simul$residuals     #Assign residuals to dataset
values = list( res = residual_simul, id =  id.eff[id] ,firm = firm.eff[firm])

data_simul = data.frame(values)
#### Heatmap of residuals ####
data_simul['quant_id']  = cut(data_simul$id, breaks = quantile(data_simul$id, probs = seq(0, 1, 0.05)), include.lowest = TRUE, labels = 1:20) 
data_simul['quant_firm']  = cut(data_simul$firm, breaks = quantile(data_simul$firm, probs = seq(0, 1, 0.05)), include.lowest = TRUE, labels = 1:20) 

heatmap_simul = data_simul[c('quant_id','quant_firm','y')]

table = heatmap_simul %>% group_by(quant_id,quant_firm) %>% summarise(mean = mean(y))

simulation_heatmap = ggplot(data = table, aes(x=quant_id, y=quant_firm, fill=mean)) + 
  geom_tile() + 
  scale_fill_gradient2(low = "blue", high = "red", mid = "white", 
                       midpoint = median(table$mean), limit = c(min(table$mean),max(table$mean)), space = "Lab",  name="Mean residual") + 
  xlab('Vingtiles of Judge FE') + ylab('Vingtiles of Court FE')

simulation_heatmap
ggsave(paste0(path,"output/figures/simulation_heatmap.pdf"),width = 6, height = 4)

#Same graph but in BW for publication
simulation_heatmap = ggplot(data = table, aes(x=quant_id, y=quant_firm, fill=mean)) + 
  geom_tile() + 
  scale_fill_gradient2(low = "black", high = "white",
                       midpoint = median(table$mean), limit = c(min(table$mean),max(table$mean)), space = "Lab",  name="Mean residual") + 
  xlab('Vingtiles of Judge FE') + ylab('Vingtiles of Court FE')

simulation_heatmap
ggsave(paste0(path,"output/figures/simulation_heatmap_bw.pdf"),width = 6, height = 4)

plan(multiprocess(workers=detectCores()-2))

###### Table A3: Variance decomposition - alternative samples

var_baseline = get_variance_split(dataset_full %>% filter(duration_spell_tot >= 3))
var_full = get_variance_split(dataset_full)
var_4 = get_variance_split(dataset_full %>% filter(duration_spell_tot >= 4))
var_tot_6 = get_variance_split(dataset_full %>% filter(period_by_judge_court_all >= 6))

var_table = cbind(var_baseline, var_full, var_4, var_tot_6)
var_table_final = var_table %>% 
  as.data.frame()
rownames(var_table_final) = c('Cases disposed (IHS)','Judge FE ','Court FE ','Connected Set FE ',
                              'Judge+Court FE', 'Adju R-squared', 'Observations', 'Number Judges', 'Share movers')
colnames(var_table_final) = c('Baseline','Full sample','4-month spell', '6-month total')

print(xtable(var_table_final, 
             type = "latex", 
             align = c("l","c","c","c", "c"),
             digits = matrix(c(2,2,2,2,2,2,0,0,2), nrow(var_table_final), ncol(var_table_final)+1)), 
      floating = FALSE, 
      booktabs = TRUE, 
      format.args = list(big.mark = ",", decimal.mark = "."),
      hline.after = c(-1,0,6,9),
      file = paste0(path,"output/tables/variance_R_alternative.tex"))

### Run AKM on alternative samples (keeping different spells of Judge-court match)
dataset_full = dataset_full %>% group_by(Judge_idP, Court_idP2) %>% 
  mutate(max_j_c = max(months_judge_court),
         months_judge_court_rev = max_j_c - months_judge_court)

model_full = akm_simple('sentence_IHS',
                            controls,
                            c('Court_idP2','Judge_idP','YM'),
                            dataset_full,
                            get_fe = TRUE)
FE_full = obtain_FE(model_full[[3]], dataset_full)

model_spell4 = akm_simple('sentence_IHS',
                          controls,
                          c('Court_idP2','Judge_idP','YM'),
                          dataset_full%>% filter(duration_spell_tot >= 4),
                          get_fe = TRUE)
FE_spell4 = obtain_FE(model_spell4[[3]], dataset_full%>% filter(duration_spell_tot >= 4))

model_total6 = akm_simple('sentence_IHS',
                          controls,
                          c('Court_idP2','Judge_idP','YM'),
                          dataset_full %>% filter(period_by_judge_court_all >= 6),
                          get_fe = TRUE)
FE_total6 = obtain_FE(model_total6[[3]], dataset_full %>% filter(period_by_judge_court_all >= 6))

model_first2 = akm_simple('sentence_IHS',
                          controls,
                          c('Court_idP2','Judge_idP','YM'),
                          dataset_full %>% filter(months_judge_court >= 3),
                          get_fe = TRUE)
FE_first2 = obtain_FE(model_first2[[3]],  dataset_full %>% filter(months_judge_court >= 3))

model_firstlast2 = akm_simple('sentence_IHS',
                          controls,
                          c('Court_idP2','Judge_idP','YM'),
                          dataset_full %>% filter(months_judge_court >= 3 & months_judge_court_rev >= 2),
                          get_fe = TRUE)
FE_firstlast2 = obtain_FE(model_firstlast2[[3]], dataset_full %>% filter(months_judge_court >= 3 & months_judge_court_rev >= 2))

dt_FixedEffects_alternative = dataset_full %>% select(Judge_idP) %>% unique(by = 'Judge_idP') %>%
  mutate(Judge_idP = as.character(Judge_idP)) %>% 
  left_join(FE_spell4%>% select(Judge_idP, Judge_FE) %>% unique(by = 'Judge_idP') %>% 
              rename(Judge_FE_spell4 = Judge_FE), 
            by = c('Judge_idP')) %>% 
  left_join(FE_total6%>% select(Judge_idP, Judge_FE) %>% unique(by = 'Judge_idP') %>% 
              rename(Judge_FE_total6 = Judge_FE), 
            by = c('Judge_idP')) %>% 
  left_join(FE_first2%>% select(Judge_idP, Judge_FE) %>% unique(by = 'Judge_idP') %>% 
              rename(Judge_FE_first2 = Judge_FE), 
            by = c('Judge_idP')) %>% 
  left_join(FE_firstlast2%>% select(Judge_idP, Judge_FE) %>% unique(by = 'Judge_idP') %>% 
              rename(Judge_FE_firstlast2 = Judge_FE), 
            by = c('Judge_idP'))

fwrite(dt_FixedEffects_alternative,
       file = paste0(path, "temp/Intermed_FE_Alternative_0.csv"))

#####Robustness - drop initial and last months
samples2 = list(dataset_full %>% filter(months_judge_court >= 3),
               dataset_full %>% filter(months_judge_court >= 3 & months_judge_court_rev >= 2))
FE_restricted =future_map(samples2, ~akm_simple('sentence_IHS',
                                               controls,
                                               c('Court_idP2','Judge_idP','YM'),
                                               dt =.,
                                               get_fe = TRUE),
                          .progress = TRUE)
dt_FixedEffects_robust = dataset_full %>% select(Judge_idP) %>% unique(by = 'Judge_idP') %>%
  left_join(obtain_Judge_FE(FE_restricted[[1]][[3]]) %>% 
              rename_with(~paste0(.,"_first2"),-c('Judge_idP')),
            by = c('Judge_idP')) %>%
  left_join(obtain_Judge_FE(FE_restricted[[2]][[3]]) %>% 
              rename_with(~paste0(.,"_firstlast2"),-c('Judge_idP')),
            by = c('Judge_idP'))

fwrite(dt_FixedEffects_robust,
       file = paste0(path, "temp/Intermed_FE_robust.csv"))


####### CHECKS
#4. Using Hearings as LHS variable ----
model_JudgeCourt = akm_simple('sentence_IHS',
                              controls,
                              c('Court_idP2','Judge_idP','YM'),
                              dataset_estimate,
                              get_fe = TRUE)
FE_Baseline = obtain_FE(model_JudgeCourt[[3]], dataset_estimate)
model_JudgeCourt_original = akm_simple('sentence_IHS',
                              c("total_courts_overall", "total_judges_overall"),
                              c('Court_idP2','Judge_idP','YM'),
                              dataset_estimate,
                              get_fe = TRUE)
FE_Baseline_original = obtain_FE(model_JudgeCourt_original[[3]], dataset_estimate)

model_JudgeCourt_flex = akm_simple('sentence_IHS',
                                       c("total_courts_overall", "total_judges_overall",
                                         "months_judge_court", "months_sqr"  ),
                                       c('Court_idP2','Judge_idP','YM'),
                                       dataset_estimate %>% mutate(months_sqr = months_judge_court^2),
                                       get_fe = TRUE)
FE_Baseline_flex = obtain_FE(model_JudgeCourt_flex[[3]], dataset_estimate)

model_flex_drop = akm_simple('sentence_IHS',
                                   c("total_courts_overall", "total_judges_overall",
                                     "months_judge_court", "months_sqr"  ),
                                   c('Court_idP2','Judge_idP','YM'),
                                   dataset_estimate %>% mutate(months_sqr = months_judge_court^2) %>% 
                                     filter(months_judge_court >= 4),
                                   get_fe = TRUE)
FE_Baseline_flexdrop = obtain_FE(model_flex_drop[[3]], dataset_estimate)
model_flex_drop2 = akm_simple('sentence_IHS',
                             c("total_courts_overall", "total_judges_overall"),
                             c('Court_idP2','Judge_idP','YM', 'months_judge_court'),
                             dataset_estimate %>% filter(months_judge_court >= 4),
                             get_fe = TRUE)
FE_Baseline_flexdrop2 = obtain_FE(model_flex_drop2[[3]], dataset_estimate)
model_base_drop = akm_simple('sentence_IHS',
                              c("total_courts_overall", "total_judges_overall"),
                              c('Court_idP2','Judge_idP','YM'),
                              dataset_estimate %>% filter(months_judge_court >= 4),
                              get_fe = TRUE)
FE_Baseline_drop = obtain_FE(model_base_drop[[3]], dataset_estimate)

dt_FixedEffects = dataset_estimate %>% select(Judge_idP) %>% 
  unique(by = 'Judge_idP') %>%
  mutate(Judge_idP = as.character(Judge_idP)) %>% 
  left_join(FE_Baseline %>% select(Judge_idP, Judge_FE) %>% unique(by = 'Judge_idP') %>%
              rename_with(~paste0(.,"_baseline_new"),-c('Judge_idP')),
            by = c('Judge_idP')) %>%
  left_join(FE_Baseline_original %>% select(Judge_idP, Judge_FE) %>% unique(by = 'Judge_idP') %>%
              rename_with(~paste0(.,"_baseline_original"),-c('Judge_idP')),
            by = c('Judge_idP')) %>%
  left_join(FE_Baseline_flex  %>% select(Judge_idP, Judge_FE) %>% unique(by = 'Judge_idP') %>%
              rename_with(~paste0(.,"_baseline_flex"),-c('Judge_idP')),
            by = c('Judge_idP')) %>% 
  left_join(FE_Baseline_flexdrop  %>% select(Judge_idP, Judge_FE) %>% unique(by = 'Judge_idP') %>%
              rename_with(~paste0(.,"_baseline_flexdrop"),-c('Judge_idP')),
            by = c('Judge_idP')) %>% 
  left_join(FE_Baseline_flexdrop2  %>% select(Judge_idP, Judge_FE) %>% unique(by = 'Judge_idP') %>%
              rename_with(~paste0(.,"_baseline_flexdrop2"),-c('Judge_idP')),
            by = c('Judge_idP')) %>% 
  left_join(FE_Baseline_drop  %>% select(Judge_idP, Judge_FE) %>% unique(by = 'Judge_idP') %>%
              rename_with(~paste0(.,"_baseline_baselinedrop"),-c('Judge_idP')),
            by = c('Judge_idP')) 

fwrite(dt_FixedEffects,
       file = paste0(path, "temp/FE_newtests.csv"))

